US census analysis

Use map and R with the US census data. First, libraries.

knitr::opts_chunk$set(echo = TRUE)
library(ggplot2)      # For plotting
library(tidycensus)   # For downloading Census data
library(tmap)         # For creating tmap
library(tmaptools)    # For reading and processing spatial data related to tmap
library(dplyr)        # For data wrangling
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(sf)           # For reading, writing and working with spatial objects
## Linking to GEOS 3.6.1, GDAL 2.1.3, PROJ 4.9.3
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble  2.1.3     ✔ purrr   0.3.2
## ✔ tidyr   0.8.3     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

Add then the api key available at:

census_api_key("e779738acbec44ceefb30886cb8d4a30deed7e0a")
## To install your API key for use in future sessions, run this function with `install = TRUE`.

Download the data

get_asc is part for the tidycensus package.

dat12 <- get_acs("county", table = "B27001", year = 2012, 
                 output = "tidy", state = NULL, geometry = FALSE) %>%
  rename(`2012` = estimate) %>%
  select(-NAME, -moe) 
## Getting data from the 2008-2012 5-year ACS
## Loading ACS5 variables for 2012 from table B27001. To cache this dataset for faster access to ACS tables in the future, run this function with `cache_table = TRUE`. You only need to do this once per ACS dataset.
dat16 <- get_acs("county", table = "B27001", year = 2016, 
                 output = "tidy", state = NULL, geometry = TRUE, shift_geo = TRUE) %>%
  rename(`2016` = estimate) %>%
  select(-moe)
## Getting data from the 2012-2016 5-year ACS
## Using feature geometry obtained from the albersusa package
## Loading ACS5 variables for 2016 from table B27001. To cache this dataset for faster access to ACS tables in the future, run this function with `cache_table = TRUE`. You only need to do this once per ACS dataset.
## Please note: Alaska and Hawaii are being shifted and are not to scale.

Download the data

get_asc is part for the tidycensus package.

# County data
data("county_laea", package = "tidycensus")

# State data
data("state_laea", package = "tidycensus")

Join by geoID

Left join to link features and records

dat <- left_join(dat16, dat12, by = c("GEOID", "variable"))
st_geometry(dat) <- NULL # This drops the geometry and leaves a table

head(dat)
##   GEOID                    NAME   variable  2016  2012
## 1 01001 Autauga County, Alabama B27001_001 54387 53571
## 2 01001 Autauga County, Alabama B27001_002 26392 25794
## 3 01001 Autauga County, Alabama B27001_003  2003  2213
## 4 01001 Autauga County, Alabama B27001_004  1982  2209
## 5 01001 Autauga County, Alabama B27001_005    21     4
## 6 01001 Autauga County, Alabama B27001_006  5119  5174

What is this step?

dat <- mutate(dat,
              cat = case_when(
                variable %in% paste0("B27001_0",
                                     c("09","12","37","40")) ~ "pop1834",
                variable %in% paste0("B27001_0",
                                     c("11","14","39","42")) ~ "pop1834ni")) %>%
  filter(!is.na(cat))
# Create long version
dat <- tidyr::gather(dat, year, estimate, c(`2012`, `2016`))

# Group the data by our new categories and sum
dat <- group_by(dat, GEOID, NAME, year, cat) %>%
  summarize(estimate = sum(estimate)) %>%
  ungroup() %>%
  tidyr::spread(cat, estimate) 

head(dat)
## # A tibble: 6 x 5
##   GEOID NAME                    year  pop1834 pop1834ni
##   <chr> <chr>                   <chr>   <dbl>     <dbl>
## 1 01001 Autauga County, Alabama 2012    10966      2348
## 2 01001 Autauga County, Alabama 2016    11246      1968
## 3 01003 Baldwin County, Alabama 2012    34353     11012
## 4 01003 Baldwin County, Alabama 2016    37178      9639
## 5 01005 Barbour County, Alabama 2012     5040      1918
## 6 01005 Barbour County, Alabama 2016     4996      1504

Download the data

get_asc is part for the tidycensus package.

dat <- mutate(dat, est = (pop1834ni/pop1834) * 100) %>%
  select(-c(pop1834, pop1834ni)) %>%
  tidyr::spread(year, est) %>%
  mutate(diff = `2016`-`2012`)


head(dat)
## # A tibble: 6 x 5
##   GEOID NAME                    `2012` `2016`   diff
##   <chr> <chr>                    <dbl>  <dbl>  <dbl>
## 1 01001 Autauga County, Alabama   21.4   17.5  -3.91
## 2 01003 Baldwin County, Alabama   32.1   25.9  -6.13
## 3 01005 Barbour County, Alabama   38.1   30.1  -7.95
## 4 01007 Bibb County, Alabama      33.0   16.7 -16.3 
## 5 01009 Blount County, Alabama    27.3   20.4  -6.88
## 6 01011 Bullock County, Alabama   20.4   34.5  14.1

Download the data

get_asc is part for the tidycensus package.

datlong <- select(dat, -diff) %>%
  tidyr::gather(year, estimate, c(`2012`, `2016`)) %>%
  group_by(year) %>%
  mutate(med = round(median(estimate, na.rm = TRUE), 1))

ggplot(datlong, aes(estimate)) +
  geom_histogram(fill = "firebrick2", 
                 color = "white", bins = 60) +
  xlab("Uninsured adults ages 18-34 by county (%)") +
  theme(plot.title = element_text(hjust = 0.5)) +
  facet_wrap(~year, ncol = 1) +
  geom_vline(aes(xintercept = med,
                 group = year), lty = "dashed") +
  geom_text(aes(label = paste("Median = ", med), x = med, y = 55))
## Warning: Removed 2 rows containing non-finite values (stat_bin).

Download the data

get_asc is part for the tidycensus package.

d10 <- top_n(dat, 10, diff) %>%
  mutate(type = "Insured population decreased",
         difftemp = diff)

i10 <- top_n(dat, -10, diff) %>%
  mutate(type = "Insured population increased",
         difftemp = abs(diff))

id10 <- bind_rows(list(i10, d10)) %>%
  arrange(desc(difftemp))
ggplot(id10) + 
  geom_col(aes(x = forcats::fct_reorder(NAME, difftemp), 
               y = difftemp, fill = type)) +
  coord_flip() +
  scale_fill_manual(values = c("firebrick2", "cyan4")) +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position = "bottom",
        legend.title = element_blank()) +
  ggtitle("Counties with the greatest change (+/-) in
    insured population, ages 18-34, 2012-2016") +
  ylab("Difference in % insured (2016 - 2012)") +
  xlab("")

Download the data

get_asc is part for the tidycensus package.

# dat16 is our original geographic object and dat is the tabular data
shp <- dat16 %>%
  filter(variable == "B27001_001") %>% # much faster than using distinct()
  select(GEOID, NAME) %>%
  left_join(dat, by = c("GEOID", "NAME")) %>%
  arrange(GEOID) %>%
  rename(uninsured_2012 = `2012`,
         uninsured_2016 = `2016`,
         uninsured_diff = diff)

head(dat)
## # A tibble: 6 x 5
##   GEOID NAME                    `2012` `2016`   diff
##   <chr> <chr>                    <dbl>  <dbl>  <dbl>
## 1 01001 Autauga County, Alabama   21.4   17.5  -3.91
## 2 01003 Baldwin County, Alabama   32.1   25.9  -6.13
## 3 01005 Barbour County, Alabama   38.1   30.1  -7.95
## 4 01007 Bibb County, Alabama      33.0   16.7 -16.3 
## 5 01009 Blount County, Alabama    27.3   20.4  -6.88
## 6 01011 Bullock County, Alabama   20.4   34.5  14.1
# Remove the Aleutians West from shp for display purposes.
# NOTE: this isn't necessary since I'm using the shift_geo
# argument in the get_acs function. However if you're not
# using shift_geo or joining to a different spatial layer
# for the full US you may want to consider removing this 
# record for display purposes.
shp <- filter(shp, GEOID != "02016")

Download the data

get_asc is part for the tidycensus package.

# Define the shape and the layer elements
tm_shape(shp) +
  tm_polygons()

tm_shape(shp) +
  tm_polygons("uninsured_2012")

tm_shape(shp) +
  tm_bubbles("uninsured_2012")

# Create a simple geographic object with one point
dat <- data.frame(c("Empire State Building"), 
                  lat = c(40.748595), 
                  long = c(-73.985718))
sites <- sf::st_as_sf(dat, coords = c("long", "lat"), 
                      crs = 4326, 
                      agr = "identity")

tm_shape(shp) +
  tm_polygons() +
  tm_shape(sites) +
  tm_dots(size = 2)

tm_shape(shp, projection = "wintri") +
  tm_polygons()
## OGR: Corrupt data
## OGR: Corrupt data

## OGR: Corrupt data
## OGR: Corrupt data
# Simplify shapes with tmap function
shpsimp <- simplify_shape(shp, fact = 0.05)
## Registered S3 method overwritten by 'crul':
##   method                 from
##   as.character.form_file httr
## Registered S3 method overwritten by 'geojsonlint':
##   method         from 
##   print.location dplyr

Download the data

get_asc is part for the tidycensus package.

var <- "uninsured_2012"

tm_shape(shp, projection = 2163) +
  tm_polygons(var,
              style = "quantile",
              palette = "BuPu") +
  tm_legend(legend.position = c("left", "bottom"))

Download the data

get_asc is part for the tidycensus package.

cuts <- c(0, 10, 20, 30, 40, 100)

tm_shape(shp, projection = 2163) +
  tm_polygons(var,
              breaks = cuts,
              palette = "BuPu", 
              border.col = "white", 
              border.alpha = 0.5) +
  tm_legend(legend.position = c("left", "bottom"))

Download the data

get_asc is part for the tidycensus package.

tm_shape(shp, projection = 2163) +
  tm_polygons(var,
              breaks = cuts,
              palette = "seq", 
              border.col = "white", 
              border.alpha = 0.5) +
  tm_legend(legend.position = c("left", "bottom"))

Download the data

get_asc is part for the tidycensus package.

tm_shape(shp, projection = 2163) +
  tm_polygons(var,
              breaks = cuts,
              palette = "-BuPu", 
              border.col = "white", 
              border.alpha = 0.5) +
  tm_legend(legend.position = c("left", "bottom"))

Download the data

get_asc is part for the tidycensus package.

mycols <- c("#f0f4c3", "#dce775", "#cddc39", "#afb42b", "#827717")

tm_shape(shp, projection = 2163) +
  tm_polygons(var,
              breaks = cuts,
              palette = mycols, 
              border.col = "white", 
              border.alpha = 0.5) +
  tm_legend(legend.position = c("left", "bottom"))

Download the data

get_asc is part for the tidycensus package.

mymap <- tm_shape(shp, projection = 2163) +
  tm_polygons(var,
              breaks = cuts,
              palette = "BuPu", 
              border.col = "white", 
              border.alpha = 0.5,
              title = "Uninsured (%)") +
  tm_legend(legend.position = c("left", "bottom")) +
  tm_layout(title = "Uninsured adults ages 18-34 by county, 2012",
            title.size = 1.1,
            title.position = c("center", "top"))

mymap

Download the data

get_asc is part for the tidycensus package.

mymap + tm_layout(inner.margins = c(0.06, 0.10, 0.10, 0.08))

Download the data

get_asc is part for the tidycensus package.

mymap + 
  tm_scale_bar() + 
  tm_compass()

Download the data

get_asc is part for the tidycensus package.

# Add unit argument to tm_shape
#tm_shape(shp, projection = 2163, unit = "mi")

Download the data

get_asc is part for the tidycensus package.

# Customize scale bar and north arrow
mymap + tm_scale_bar(color.dark = "gray60",
                     position = c("right", "bottom")) + 
  tm_compass(type = "4star", size = 2.5, fontsize = 0.5,
             color.dark = "gray60", text.color = "gray60",
             position = c("left", "top"))
## Warning: The argument fontsize of tm_compass is deprecated. It has been
## renamed to text.size

Download the data

get_asc is part for the tidycensus package.

# Create a state FIPS field 
shp <- mutate(shp, STFIPS = stringr::str_sub(GEOID, 1, 2))

# Aggregate the county layer by state 
states <- shp %>%
  aggregate_map(by = "STFIPS")
## Warning: This function is deprecated and has been migrated to github.com/
## mtennekes/oldtmaptools
# Add the new state layer
mymap + tm_shape(states) +
  tm_borders(col = "black")

Download the data

get_asc is part for the tidycensus package.

createMap <- function(.data, varname, statecol, maptitle){
  
  tm_shape(.data, projection = 2163, unit = "mi") +
    tm_polygons(varname,
                breaks = cuts,
                palette = "BuPu", 
                border.col = "white", 
                border.alpha = 0.5,
                title = "Uninsured (%)") +
    tm_legend(legend.position = c("left", "bottom")) +
    tm_layout(title = maptitle,
              title.size = 1.1,
              title.position = c("center", "top"),
              inner.margins = c(0.06, 0.10, 0.10, 0.08),
              frame = FALSE) +
    tm_scale_bar(color.dark = "gray60",
                 position = c("right", "bottom")) + 
    tm_compass(type = "4star", size = 2.5, fontsize = 0.5,
               color.dark = "gray60", text.color = "gray60",
               position = c("left", "top")) +
    tm_shape(states) +
    tm_borders(col = statecol)
  
}

Download the data

get_asc is part for the tidycensus package.

m1 <- createMap(shp, 
                varname = "uninsured_2012", 
                statecol = "green", 
                maptitle = "Here is title 1")
## Warning: The argument fontsize of tm_compass is deprecated. It has been
## renamed to text.size
m2 <- createMap(shp, 
                varname = "uninsured_2016", 
                statecol = "yellow",
                maptitle = "Here is title 2")
## Warning: The argument fontsize of tm_compass is deprecated. It has been
## renamed to text.size
tmap_arrange(m1, m2, ncol = 1)

Download the data

get_asc is part for the tidycensus package.

var2 <- c("uninsured_2012", "uninsured_2016")
title2 <- c("Uninsured adults ages 18-34 by county, 2012", 
            "Uninsured adults ages 18-34 by county, 2016")

createMap(shp, 
          varname = var2, 
          statecol = "black", 
          maptitle = title2)
## Warning: The argument fontsize of tm_compass is deprecated. It has been
## renamed to text.size

Download the data

get_asc is part for the tidycensus package.

shplong <- select(shp, GEOID, NAME, uninsured_2012, uninsured_2016) %>%
  tidyr::gather(year, estimate, c(uninsured_2012, uninsured_2016)) %>%
  arrange(GEOID)

glimpse(shplong)
## Observations: 6,282
## Variables: 5
## $ GEOID    <chr> "01001", "01001", "01003", "01003", "01005", "01005", "…
## $ NAME     <chr> "Autauga County, Alabama", "Autauga County, Alabama", "…
## $ year     <chr> "uninsured_2012", "uninsured_2016", "uninsured_2012", "…
## $ estimate <dbl> 21.41164, 17.49956, 32.05542, 25.92662, 38.05556, 30.10…
## $ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((1269841 -13..., MULTIPOLYG…
## Observations: 6,282
## Variables: 5
## $ GEOID    <chr> "01001", "01001", "01003", "01003", "01005", "01005",...
## $ NAME     <chr> "Autauga County, Alabama", "Autauga County, Alabama",...
## $ year     <chr> "uninsured_2012", "uninsured_2016", "uninsured_2012",...
## $ estimate <dbl> 21.41164, 17.49956, 32.05542, 25.92662, 38.05556, 30....
## $ geometry <sf_geometry [m]> MULTIPOLYGON (((1269841 -13..., MULTIPOLY...
mymap <- tm_shape(shplong, projection = 2163) +
  tm_polygons("estimate",
              breaks = cuts,
              palette = "BuPu", border.col = "white", 
              border.alpha = 0.3,
              title = "Uninsured (%)") +
  tm_facets(by = "year", free.coords = TRUE, ncol = 1) +
  tm_shape(states) +
  tm_borders(col = "black")

Download the data

get_asc is part for the tidycensus package.

# Filter to Texas and create some additional
# fields for mapping. Since we'll be using
tx <- filter(shp, STFIPS == "48") %>%
  mutate(NAME = stringr::str_remove(NAME, ", Texas"),
         `Insured Adults Ages 18-34, 2012-2016` = case_when(
           uninsured_diff < 0  ~ "Insured population increased",
           uninsured_diff > 0  ~ "Insured population decreased",
           uninsured_diff == 0 ~ "Insured population stayed the same"),
         diff2 = round(abs(uninsured_diff), 1),
         popup = ifelse(uninsured_diff != 0,
                        paste0(`Insured Adults Ages 18-34, 2012-2016`, " by ", diff2, "%"), 
                        `Insured Adults Ages 18-34, 2012-2016`),
         diffrad = as.numeric(cut(diff2, c(0, 5, 10, 20, 30, 45), 
                                  right = FALSE)))

# Remove some unnecessary fields
tx <- select(tx, -c(uninsured_2012, uninsured_2016, uninsured_diff, STFIPS))

Download the data

get_asc is part for the tidycensus package.

# Basemap
carto <- "https://cartodb-basemaps-{s}.global.ssl.fastly.net/light_all/{z}/{x}/{y}{r}.png"

# Create a "normal" tmap except we'll add  
# the basemap and the `popup.vars` argument.
# The symbol size of the bubbles will be
# based on the data so use our calculated
# field `diffrad` which will apply sizes
# 1 through 5. Sizes can be further adjusted
# using the `scale` argument.

mymap <- tm_basemap(carto) +  
  tm_shape(tx) +
  tm_borders(col = "azure2") +
  tm_bubbles("diffrad", 
             col = "Insured Adults Ages 18-34, 2012-2016", 
             border.col = "white", 
             scale = 1.5,
             style = "fixed",
             palette = c("coral2", "aquamarine3", "gray"),
             popup.vars = c("County: " = "NAME", "Change: " = "popup"))

tmap_leaflet(mymap)
## Legend for symbol sizes not available in view mode.

Download the data

get_asc is part for the tidycensus package.

# Load the raster data
data(land)

# Remove AK and HI
shp_rmAKHI <- filter(shp, !STFIPS %in% c("02", "15"))

Download the data

get_asc is part for the tidycensus package.

# Create a color palette for land cover
# This was taken directly from the tmap documentation
pal20 <- c("#003200", "#3C9600", "#006E00", "#556E19", "#00C800", 
           "#8CBE8C", "#467864", "#B4E664", "#9BC832", "#EBFF64", "#F06432", 
           "#9132E6", "#E664E6", "#9B82E6", "#B4FEF0", "#646464", "#C8C8C8", 
           "#FF0000", "#FFFFFF", "#5ADCDC")

# Map the raster data and assign the bounding box of
# the county layer. Add the county layer on top.
tm_shape(land, bbox = shp_rmAKHI) +
  tm_raster("cover", palette = pal20, alpha = 0.8) +
  tm_shape(shp_rmAKHI) + 
  tm_borders(alpha = 0.4, col = "black") +
  tm_layout(inner.margins = c(0.06, 0.10, 0.10, 0.08)) +
  tm_layout(legend.position = c("left","bottom"))